Read data

First we read in the processed dataset.

suppressPackageStartupMessages(library(SingleCellExperiment))
suppressPackageStartupMessages(library(scater))
suppressPackageStartupMessages(library(edgeR))
suppressPackageStartupMessages(library(limma))
# load("../RData/20180725_allFT_Clincluster_12clusters.RData")
sceset <- readRDS("../rds/20180725_allFT_Clincluster_12clusters_sceset.rds")
secretory <- readRDS("../rds/20180910Fresh_secretory_5clusters_clincluster.rds")
# dim(sceset)
 # secretory <- sceset[,sceset$final.clusters %in% c(8,9,10) & sceset$source == "Fresh" & sceset$Patient != "15066L"] # 
# secretory <- secretory[,logcounts(secretory)["KRT7",] > 2 & 
#                            logcounts(secretory)["EPCAM",] > 2 & 
#                            logcounts(secretory)["PTPRC",] == 0 &
#                            logcounts(secretory)["CCDC17",] < 1 ]
FTE <- sceset[, !(sceset$final.clusters %in% c(4,6)) & sceset$Patient != "15066L"] 
FTE <- FTE[,logcounts(FTE)["EPCAM",] > 2 & logcounts(FTE)["PTPRC",] == 0]
plotTSNE(FTE, colour_by = "source")

plotTSNE(FTE, colour_by = "final.clusters")

11553

P11553 <- FTE[,FTE$Patient2 == "11553"]
P15072 <- FTE[,FTE$Patient2 == "15072"]
P15072$source[P15072$source == "2-day cultured"] <- "6-day cultured"
p1 <- plotTSNE(P11553, colour_by = "source")
p2 <- plotTSNE(P11553, colour_by = "final.clusters")
p3 <- plotTSNE(P15072, colour_by = "source")
p4 <- plotTSNE(P15072, colour_by = "final.clusters")
cowplot::plot_grid(p1,p2,p3,p4)

P15072 secretory

P15072 <- P15072[, !(P15072$final.clusters %in% c(5,11))]
plotPCA(P15072, colour_by = "source")

DE analysis P15072 secretory

matrix <- expm1(logcounts(P15072))
keep <- rowSums(matrix > 1) > 5
sum(keep)
## [1] 13820
dge <- edgeR::DGEList(counts = matrix[keep,]) # make a edgeR object
rm(matrix,keep)
design <- model.matrix(~  0 + source, data = P15072@colData)  # Use 0 because we do not need intercept for this linear model
colnames(design)
## [1] "source6-day cultured" "sourceFresh"          "sourceO.N. cultured"
colnames(design) <- gsub(pattern = " cultured", replacement = "", x = colnames(design))
colnames(design) <- gsub(pattern = "-", replacement = "", x = colnames(design))
# Transform count data to log2-counts per million (logCPM), estimate the mean-variance relationship and use this to compute appropriate observation-level weights. The data are then ready for linear modelling.
v <- voom(dge, design, plot = TRUE)

fit <- lmFit(v, design) # Linear Model for Series of Arrays
colnames(design) 
## [1] "source6day"  "sourceFresh" "sourceO.N."

DE genes

cont.matrix <- makeContrasts("sourceO.N.-sourceFresh",
                             "source6day-sourceFresh",
                             "source6day-sourceO.N.",
                             levels = design)
fit2 <- contrasts.fit(fit, cont.matrix) # Compute Contrasts from Linear Model Fit
fit2 <- eBayes(fit2) #Empirical Bayes Statistics for Differential Expression

rls <- topTable(fit2, n = Inf, coef = 1, sort = "logFC", lfc = 1, p = 0.05)
rls$gene <- rownames(rls)
# v_expr <- logcounts(secretory)[rownames(rls), secretory$initial.cluster == rownames(n_deg2)[i]]
# rls$ratio1 <- rowSums(v_expr > 0.5)/ncol(v_expr)
# v_expr <- logcounts(secretory)[rownames(rls), secretory$initial.cluster == colnames(n_deg2)[j]]
# rls$ratio2 <- rowSums(v_expr > 0.5)/ncol(v_expr)
fresh_up_ON <- rls[rls$logFC > 1,] 
fresh_down_ON <- rls[rls$logFC < -1,] 

rls <- topTable(fit2, n = Inf, coef = 2, sort = "logFC", lfc = 1, p = 0.05 )
rls$gene <- rownames(rls)
fresh_up_6d <- rls[rls$logFC > 1,] 
fresh_down_6d <- rls[rls$logFC < -1,] 

rls <- topTable(fit2, n = Inf, coef = 3, sort = "logFC", lfc = 1, p = 0.05 )
rls$gene <- rownames(rls)
ON_up_6d <- rls[rls$logFC > 1,] 
ON_down_6d <- rls[rls$logFC < -1,] 
sum(fresh_up_ON$gene %in% fresh_up_6d$gene)
## [1] 1338
P15072$source <- factor(P15072$source, levels = c("Fresh","O.N. cultured", "6-day cultured"))
plotExpression(P15072, features = head(fresh_up_ON$gene,12), x = "source", ncol = 3)

plotExpression(P15072, features = head(fresh_down_ON$gene[fresh_down_ON$gene %in% ON_down_6d$gene],15), x = "source", ncol = 3, colour_by = "source")

CRISP3 is expressed highly in protein atlas. NR4A1 is expressed in fallopian tube. High Expression of Orphan Nuclear Receptor NR4A1 in a Subset of Ovarian Tumors with Worse Outcome (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5154956/)

SERPING1 is special. It is rarely expressed in normal FT but expressed in OvCa.

plotExpression(P15072, features = c("PTGS1","SERPING1"), x = "source")

GORilla

# write.table(rownames(dge), quote = F, sep = "\n", file = "../GOrilla input/20180914_bg_genes.txt", row.names = F, col.names = F)
# write.table(fresh_up_ON$gene, quote = F, sep = "\n", file = "../GOrilla input/20180914_fresh_up_ON_DEGs.txt", row.names = F, col.names = F)
# write.table(fresh_down_ON$gene, quote = F, sep = "\n", file = "../GOrilla input/20180914_fresh_down_ON_DEGs.txt", row.names = F, col.names = F)

# write.table(fresh_up_6d$gene, quote = F, sep = "\n", file = "../GOrilla input/20180914_fresh_up_6d_DEGs.txt", row.names = F, col.names = F)
# write.table(fresh_down_6d$gene, quote = F, sep = "\n", file = "../GOrilla input/20180914_fresh_down_6d_DEGs.txt", row.names = F, col.names = F)

# write.table(ON_down_6d$gene, quote = F, sep = "\n", file = "../GOrilla input/20180914_ON_down_6d_DEGs.txt", row.names = F, col.names = F)
# write.table(ON_up_6d$gene, quote = F, sep = "\n", file = "../GOrilla input/20180914_ON_up_6d_DEGs.txt", row.names = F, col.names = F)

# write.table(fresh_up_ON$gene, quote = F, sep = "\n", file = "../GOrilla input/20180914_fresh_up_ON_DEGs.txt", row.names = F, col.names = F)
# write.table(fresh_down_ON$gene, quote = F, sep = "\n", file = "../GOrilla input/20180914_fresh_down_ON_DEGs.txt", row.names = F, col.names = F)

Downregulated GOs compared to Fresh:

Venn plots

dim(fresh_up_ON)
## [1] 2006    7
dim(fresh_down_ON)
## [1] 438   7
dim(fresh_up_6d)
## [1] 2856    7
dim(fresh_down_6d)
## [1] 407   7
dim(ON_up_6d)
## [1] 1425    7
dim(ON_down_6d)
## [1] 551   7
tmp <- venn::venn(list(fUPon=fresh_up_ON$gene, 
                       fUP6d=fresh_up_6d$gene,
                       onUP6d= ON_up_6d$gene),
                  ilab=TRUE, zcolor = "style")

# attr(tmp, "intersections")
tmp <- venn::venn(list(fDOWNo = fresh_down_ON$gene,
                       fDown6 = fresh_down_6d$gene,
                       oDOWN6= ON_down_6d$gene), 
                  ilab=TRUE,
                  zcolor = "style")

# attr(tmp, "intersections")

Inconsistent change

tmp <- venn::venn(list(fresh_down_ON = fresh_down_ON$gene,
                       ON_up_6d= ON_up_6d$gene), 
                  ilab=TRUE,
                  zcolor = "style")

tmp2 <- attr(tmp, "intersections")
tmp2$`fresh_down_ON:ON_up_6d`
##   [1] "FOS"       "CRIP2"     "GAS6"      "UCP2"      "NGFRAP1"  
##   [6] "ASAH1"     "IFITM1"    "RNASET2"   "LY6E"      "APOBEC3C" 
##  [11] "CTGF"      "ECH1"      "GSN"       "CAT"       "HMGN3"    
##  [16] "FOLR1"     "LGALS3BP"  "CTSH"      "HMGN2"     "IFITM2"   
##  [21] "PTGR1"     "CTSD"      "MAGED1"    "PHPT1"     "ADIRF"    
##  [26] "TMEM98"    "ATRAID"    "MGMT"      "IFITM3"    "CLDN10"   
##  [31] "BDH2"      "CMTM6"     "TECR"      "SHMT1"     "GSTK1"    
##  [36] "CST3"      "KLK11"     "HSPB1"     "ETFB"      "ATP5G2"   
##  [41] "UBB"       "ALDH7A1"   "ATPIF1"    "PON2"      "HMGB2"    
##  [46] "FXYD3"     "RCN2"      "AKR7A2"    "DYNLL1"    "SUMF2"    
##  [51] "PRDX2"     "GSTT1"     "EMP2"      "KRTCAP3"   "GRN"      
##  [56] "SEPW1"     "BCKDHA"    "SYNE2"     "SORBS2"    "ALKBH7"   
##  [61] "SH3BGRL"   "QPRT"      "CCNG1"     "ECHDC2"    "DNPH1"    
##  [66] "NDUFA13"   "PRDX3"     "TOMM7"     "C19orf60"  "PDIA4"    
##  [71] "PEBP1"     "IDH2"      "MGST2"     "PLLP"      "PRCP"     
##  [76] "NINJ1"     "ADI1"      "TSPAN1"    "TMEM66"    "CRIP1"    
##  [81] "LSMD1"     "PEMT"      "TTC3"      "HINT2"     "CETN2"    
##  [86] "SUMF1"     "MGST3"     "ERGIC3"    "ASRGL1"    "TSPAN6"   
##  [91] "APLP2"     "CD24"      "ISOC1"     "TMEM230"   "ASS1"     
##  [96] "GLG1"      "COX5B"     "ECHS1"     "TMEM219"   "LOC440335"
## [101] "MYL9"      "SCRN2"     "PSMB9"     "CPVL"      "FKBP9"    
## [106] "LRPAP1"    "C9orf16"   "UQCRQ"     "TMEM9"     "PDLIM1"   
## [111] "HIGD2A"    "TXN2"      "PSME1"     "TRIM22"    "CALCOCO1" 
## [116] "COA3"      "TMEM14A"   "DHRS7"     "SCP2"      "MFAP2"    
## [121] "BSG"       "GPX3"      "CTSC"      "HADH"      "ANKRD65"  
## [126] "SUCLG2"    "NDUFV1"    "GEMIN8"    "SPON1"     "MARCKSL1" 
## [131] "AGR2"      "OST4"      "SMPDL3B"   "ALDH3B1"   "IFI6"     
## [136] "SYK"       "LAMB2"     "MT2A"      "FAM127A"   "LBH"      
## [141] "EID1"      "DGCR6L"    "MAP1LC3B"  "PBXIP1"
plotExpression(P15072, features = head(tmp2$`fresh_down_ON:ON_up_6d`), x = "source", ncol = 2, colour_by = "source")

p1 <- plotTSNE(P15072, colour_by = c("GAS6"))
p2 <- plotTSNE(P15072, colour_by = c("CRIP2"))
p3 <- plotTSNE(P15072, colour_by = c("FOS"))
p4 <- plotTSNE(P15072, colour_by = c("UCP2"))
cowplot::plot_grid(p1,p2,p3,p4, ncol = 2)

# write.table(tmp2$`fresh_down_ON:ON_up_6d`, quote = F, sep = "\n", file = "../GOrilla input/20180914fresh_down_ON:ON_up_6d.txt", row.names = F, col.names = F)
tmp <- venn::venn(list(fresh_up_ON = fresh_up_ON$gene,
                       ON_down_6d= ON_down_6d$gene), 
                  ilab=TRUE,
                  zcolor = "style")

tmp2 <- attr(tmp, "intersections")
tmp2$`fresh_up_ON:ON_down_6d`[1:20]
##  [1] "AXL"      "MARS"     "CDCP1"    "CD44"     "SLC1A5"   "PSMD12"  
##  [7] "TNFRSF21" "EMP3"     "STIP1"    "FOSL1"    "HSPA4"    "TXNRD1"  
## [13] "NOP16"    "ADPRHL2"  "GRWD1"    "CADM1"    "GTPBP4"   "UPP1"    
## [19] "PSMD11"   "ANXA5"
# write.table(tmp2$`fresh_up_ON:ON_down_6d`, quote = F, sep = "\n", file = "../GOrilla input/20180914fresh_up_ON:ON_down_6d.txt", row.names = F, col.names = F)

Heatmap

Choose some GOs for heatmap

# library(tidyverse)
gos <- readxl::read_excel(path = "../Gorilla output/20180914culturing_GOs.xlsx", sheet = "Summary")
head(gos )
## # A tibble: 6 x 11
##   X__1  `GO Term` Description `P-value` `FDR q-value` Enrichment     N
##   <chr> <chr>     <chr>           <dbl>         <dbl>      <dbl> <dbl>
## 1 <NA>  GO:00193… fatty acid…  7.65e- 6      5.44e- 3       9.63 12808
## 2 up_ON GO:00063… RNA proces…  1.97e-32      1.40e-28       2.03 12808
## 3 <NA>  GO:00511… localizati…  6.72e-16      4.35e-13       1.28 12808
## 4 <NA>  GO:00023… immune sys…  7.39e- 8      1.06e- 5       1.33 12808
## 5 <NA>  GO:00329… secretion …  2.51e- 6      2.49e- 4       1.43 12808
## 6 <NA>  GO:00017… cell activ…  6.23e- 6      5.76e- 4       1.4  12808
## # ... with 4 more variables: B <dbl>, n <dbl>, b <dbl>, Genes <chr>
go_genes <- gos$Genes
go_genes <- unlist(strsplit(go_genes , split = ", "))
go_genes <- unique(go_genes)
go_genes <- go_genes[grep(" - ", go_genes )]
go_genes <- sapply(go_genes, function(x) return(unlist(strsplit(x, "  -  "))[1]))
names(go_genes) <- NULL
go_genes <- unlist(go_genes)

go_genes <- gsub(pattern = "[[]", replacement = "", x = go_genes)
go_genes <- unique(go_genes)
go_genes <- data.frame(gene = go_genes,
                       GO = sapply(go_genes, function(x) return(gos$Description[grep(x, gos$Genes)][1]  )) )

go_genes <- go_genes[order(go_genes$GO, decreasing = F),]
head(go_genes)
##              gene              GO
## GCLM         GCLM cell activation
## GNB1         GNB1 cell activation
## PRMT5       PRMT5 cell activation
## TOP1         TOP1      cell death
## SERPINB9 SERPINB9      cell death
## NR3C1       NR3C1      cell death

Response

data.plot <- logcounts(P15072)[match(go_genes$gene, rownames(P15072)), ]

colanno <- data.frame(source = P15072$source)
rownames(colanno) <- colnames(data.plot)
colanno <- colanno[order(colanno$source, decreasing = F),, drop = F]

data.plot <- data.plot[,rownames(colanno)]
data.plot <- data.plot[rowSums(data.plot > 0.5) >= 5,]
data.plot <- t(scale(t(data.plot), center = T, scale = T))
data.plot <- Seurat::MinMax(data.plot , min = -2.5, max = 2.5)
data.plot[1:10,1:10]
##          15072L-p2-A02 15072L-p2-A03 15072L-p2-A04 15072L-p2-A05
## GCLM        -0.7293457    -0.7293457   -0.72934566    -0.7293457
## GNB1         2.5000000    -0.7045463    2.33193383    -0.7045463
## PRMT5       -0.9495120     1.0722717   -0.94951202    -0.9495120
## TOP1        -1.0185774    -1.0185774   -1.01857741    -1.0185774
## SERPINB9    -0.7458360     1.1165037    0.33882419    -0.7458360
## NR3C1       -0.4094784    -0.5410116   -0.54101161    -0.5410116
## GPR64       -0.5160526     1.6603375    2.34135340    -0.5160526
## SEMA4A       1.2619586    -0.3895052   -0.08719182    -0.4155336
## ITM2B        0.1855766    -0.1921770    0.44632012    -0.5670452
## CPE         -0.4688120    -0.4688120    2.22818229    -0.4688120
##          15072L-p2-A09 15072L-p2-A11 15072L-p2-A12 15072L-p2-A18
## GCLM        -0.7293457    0.31798348     0.7626400  -0.729345660
## GNB1        -0.7045463   -0.04305448    -0.7045463  -0.704546262
## PRMT5       -0.9495120    1.14490276    -0.9495120  -0.949512016
## TOP1        -1.0185774    0.92745022     1.0694756  -0.283151233
## SERPINB9    -0.5227650    1.49784182    -0.7458360   1.569712400
## NR3C1       -0.5410116   -0.54101161    -0.5410116   2.424453519
## GPR64       -0.5160526    1.16193540    -0.5160526  -0.516052597
## SEMA4A      -0.4817796   -0.48177956    -0.4817796  -0.481779562
## ITM2B        0.4871042    0.19476472    -0.2840381   0.322706423
## CPE          2.2811520   -0.46881200    -0.4688120   0.006598591
##          15072L-p2-B01 15072L-p2-B02
## GCLM        -0.7293457    -0.7293457
## GNB1        -0.7045463    -0.7045463
## PRMT5       -0.9495120    -0.9495120
## TOP1         0.9186643    -1.0185774
## SERPINB9     1.1743149     0.9853121
## NR3C1       -0.5410116     2.3726608
## GPR64       -0.5160526    -0.5160526
## SEMA4A      -0.4817796    -0.4817796
## ITM2B        0.1821424     0.5158808
## CPE         -0.3299672    -0.4688120
go_genes <- go_genes[go_genes$gene %in% rownames(data.plot),, drop = F]

my_colours <- colorRampPalette(c("steelblue4", "white", "firebrick2"))(200)
pheatmap::pheatmap(data.plot[as.character(go_genes$gene[grep("response", go_genes$GO)]),],  color = my_colours,
                   show_rownames = T, 
                   show_colnames = F, 
                   cluster_rows = T, cluster_cols = F,annotation_col = colanno)

dim(data.plot)
## [1] 1431  538
table(go_genes$GO)
## 
##                              cell activation 
##                                            3 
##                                   cell death 
##                                            3 
##      cell surface receptor signaling pathway 
##                                           19 
##                 fatty acid metabolic process 
##                                            4 
##                         fatty acid oxidation 
##                                            7 
##                        immune system process 
##                                           90 
##                                 localization 
##                                          611 
##                   mitotic cell cycle process 
##                                           81 
## negative regulation of programmed cell death 
##                                           10 
##                        programmed cell death 
##                                           61 
##                 regulation of cell migration 
##                                           27 
##             regulation of cell proliferation 
##                                           18 
##          regulation of immune system process 
##                                           14 
##            regulation of signal transduction 
##                                           47 
##                         response to stimulus 
##                                          167 
##                               RNA processing 
##                                          265 
##                            secretion by cell 
##                                            1

Immune

pheatmap::pheatmap(data.plot[as.character(go_genes$gene[grep("immune", go_genes$GO)]),],  color = my_colours,
                   show_rownames = T, 
                   show_colnames = F, 
                   cluster_rows = F, cluster_cols = F,annotation_col = colanno)

pheatmap::pheatmap(data.plot[as.character(go_genes$gene[grep("fatty acid", go_genes$GO)]),],  color = my_colours,
                   show_rownames = T, 
                   show_colnames = F, 
                   cluster_rows = F, cluster_cols = F,annotation_col = colanno)

pheatmap::pheatmap(data.plot[as.character(go_genes$gene[grep("cell death", go_genes$GO)]),],  color = my_colours,
                   show_rownames = T, 
                   show_colnames = F, 
                   cluster_rows = F, cluster_cols = F,annotation_col = colanno)

6-day cells

Cell cycle may happen in the cells

Initiation of centresome duplication CDK2 CDK4 CDK6 PLK2 PLK6 AURKA

MAD2L1 is a component of the mitotic spindle assembly checkpoint that prevents the onset of anaphase until all chromosomes are properly aligned at the metaphase plate.

CDKN1A/p21

This gene encodes a potent cyclin-dependent kinase inhibitor. The encoded protein binds to and inhibits the activity of cyclin-cyclin-dependent kinase2 or -cyclin-dependent kinase4 complexes, and thus functions as a regulator of cell cycle progression at G1. The expression of this gene is tightly controlled by the tumor suppressor protein p53, through which this protein mediates the p53-dependent cell cycle G1 phase arrest in response to a variety of stress stimuli.

plotTSNE(P15072, colour_by = c("CDKN1A"))

Try

Diffusion map

Diffusion map is not working well

# P15072 <- P15072[, !(P15072$final.clusters %in% c(5,11))]
# # source("https://bioconductor.org/biocLite.R")
# # biocLite("destiny")
# library(ggthemes)
# library(destiny)
# cellLabels <- colData(P15072)$source
# diff.map <- logcounts(P15072)
# colnames(diff.map) <- cellLabels
# dm <- DiffusionMap(t(diff.map))
# 
# tmp <- data.frame(DC1 = eigenvectors(dm)[,1],
#                   DC2 = eigenvectors(dm)[,2],
#                   Timepoint = cellLabels)
# ggplot(tmp, aes(x = DC1, y = DC2, colour = Timepoint)) +
#     geom_point() + scale_color_tableau() + 
#     xlab("Diffusion component 1") + 
#     ylab("Diffusion component 2") +
#     theme_classic()
# plotTSNE(FTE2, colour_by = "initial.cluster")